library(data.table)
library(ggplot2)
library(caret)
library(nnet)
library(pls)
library(FactoMineR)
library(qqman)
library(mixOmics)
data(Phenotype)
data(Genomic)
data("localisation")
pheno <- as.data.table(Phenotype)
loc_df<- as.data.table(localisation)
geno<- as.data.table(Genomic)
geno[, ID := rownames(Genomic)]
pheno[, ID := rownames(Phenotype)]
merged<- merge(pheno, loc_df[,list(Population,ID)], by = "ID")
merged<- merge(merged, geno, by = "ID")
df <- as.data.table(merged)
phenos <- names(df)[sapply(df, is.numeric)]
phenos <- setdiff(phenos, c("class_index","Lat","Lgn","Elev","ID"))
get_r2 <- function(var) {
form<- as.formula(paste(var, "~ Population + Elev + Lat + Lgn "))
mod<- lm(form, data = df)
summary(mod)$r.squared
}
r2_values<- data.table(
phenotype = phenos,
r2= sapply(phenos, get_r2)
)
Error in eval(predvars, data, env) : object 'Elev' not found
ggplot(r2_values, aes(x = reorder(phenotype, r2), y = r2)) +
geom_point(size = 3) +
geom_segment(aes(x = phenotype, xend = phenotype, y = 0, yend = r2)) +
coord_flip() +
ylab("R² lm(Phénotype ~ Population)") +
xlab("Caractère phénotypique") +
theme_minimal()
Groupes de variables : chromosomes, phénotypes, populations
constructions des axes : chromosomes => on sépare les individus selon leur distances génomiques et on projette en supplémentaires les phénotypes / population
on pourra voir quels chromosomes ne sont pas dutout corrélés avec des traits phénotypiques
geno_mat <- as.matrix(geno)
coords <- do.call(rbind, strsplit(colnames(geno), "_"))
Warning: number of columns of result is not a multiple of vector length (arg 1)
chrom <- as.factor(coords[,1])
pos <- as.numeric(coords[,2])
Warning: NAs introduced by coercion
pca <- PCA(merged[,2:217022], quali.sup=21 ,quanti.sup=(2:20) )
plot(pca,choix="ind",habillage=21)
plot(pca,choix="var", #invisible = c("quali","var"),
label = c("quanti.sup"),
select = "coord 10" )
plotIndiv(result.spca.multi)
plotVar(result.spca.multi)
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
Please use tidy evaluation idioms with `aes()`.
See also `vignette("ggplot2-in-packages")` for more information.
# extract the variables used to construct the first PC
selectVar(result.spca.multi, comp = 1)$name
[1] "Chr02_6422161" "Chr02_6427114" "Chr02_6427264" "Chr02_6416381" "Chr13_15625954"
[6] "Chr13_15574007" "Chr13_15627568" "Chr07_7181966" "Chr13_15626544" "Chr02_6382504"
[11] "Chr13_15635566" "Chr02_6422861" "Chr02_6423252" "Chr02_6426926" "Chr12_3267410"
[16] "Chr12_3264433" "Chr12_3267439" "Chr12_3264479" "Chr10_20127228" "Chr01_42022246"
[21] "Chr17_7247760" "Chr10_20121818" "Chr10_20126974" "Chr17_5836683" "Chr02_6325961"
[26] "Chr03_18516527" "Chr06_22417885" "Chr02_6332720" "Chr11_17347389" "Chr06_24641715"
[31] "Chr11_17347361" "Chr14_13405721" "Chr11_17347960" "Chr01_14722117" "Chr06_24641246"
[36] "Chr03_11418460" "Chr03_11422148" "Chr01_14722377" "Chr03_15711505" "Chr11_4789295"
[41] "Chr01_14717384" "Chr15_9806504" "Chr17_14258124" "Chr17_14265429" "Chr07_12991228"
[46] "Chr07_12991382" "Chr01_8617466" "Chr06_20423555" "Chr07_12997322" "Chr01_14708394"
# depict weight assigned to each of these variables
plotLoadings(result.spca.multi, method = 'mean', contrib = 'max')
pls.result <- mixOmics::pls(X, Y)
Warning: the standard deviation is zero
plotIndiv(pls.result)
plotIndiv(pls.result, group = merged$Population,
rep.space = 'XY-variate',
ellipse = TRUE, # plot using the ellipses
legend = TRUE)
head(merged[,2:21])
plotIndiv(spls.result, group = merged$Population,
rep.space = 'XY-variate',
ellipse = TRUE, # plot using the ellipses
legend = TRUE)
# Identifier les coefficients non nuls
nonzero_idx <- loadX != 0
df_nonzero_X <- data.frame(
SNP = names(loadX)[nonzero_idx],
loading = loadX[nonzero_idx]
)
coords <- do.call(rbind, strsplit(rownames(df_nonzero_X), "_"))
coords[,1] <- as.numeric(sub("Chr", "", coords[,1]))
pos <- as.numeric(coords[,2])
merged_nonzero <- cbind(coords,df_nonzero_X)
merged_nonzero$"2" <- as.numeric(merged_nonzero$"2")
merged_nonzero$"1" <- as.numeric(merged_nonzero$"1")
merged_nonzero$loading <- abs(merged_nonzero$loading)
manhattan(merged_nonzero, chr = "1", bp = "2", p = "loading",snp = "SNP",
col = c("blue4", "orange3"), # couleurs alternées par chromosome
cex = 0.6) # taille des points
# use the mirna, mrna and protein expression levels as predictive datasets
# note that each dataset is measured across the same individuals (samples)
X1<- as.matrix(merged[,23:217022])
X1 <- X1[, apply(X1, 2, sd) != 0]
X2 <- as.matrix(merged[,2:21])
Y <- as.factor(merged$Population)
X <- list(geno = X1, pheno = X2)
result.diablo.tcga <- block.plsda(X, Y) # run the method
plotIndiv(result.diablo.tcga) # plot the samples
plotVar(result.diablo.tcga) # plot the variables
Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
Please use tidy evaluation idioms with `aes()`.
See also `vignette("ggplot2-in-packages")` for more information.
plotIndiv(result.diablo.tcga,
rep.space = 'XY-variate',
ellipse = TRUE, # plot using the ellipses
legend = TRUE)
# set the number of features to use for the X datasets
list.keepX = list(geno = c(rep(10000,5)), pheno = c(rep(20,5)))
list.keepX
$geno
[1] 10000 10000 10000 10000 10000
$pheno
[1] 20 20 20 20 20
# run the method
result.sparse.diablo.tcga <- block.splsda(X, Y, keepX = list.keepX, ncomp = 5)
# plot the contributions of each feature to each dimension
plotLoadings(result.sparse.diablo.tcga, ncomp = 5)
plotIndiv(result.sparse.diablo.tcga,ellipse = TRUE) # plot the samples
plotVar(result.sparse.diablo.tcga) # plot the variables
library(spls)
y <- merged$CIRC2009
y <- as.numeric(y)
X <- as.matrix(merged[,23:217022])
X <- X[, apply(X, 2, sd) != 0]
# Split stratifié
# Ici, on fait un split simple car phénotype continu
train_index <- createDataPartition(y, p = 0.9, list = FALSE)
X_train <- X[train_index, ]
var_threshold <- 1e-8
nonzero_cols <- apply(X_train, 2, var) > var_threshold
length(nonzero_cols)
[1] 206066
X_train <- X_train[, nonzero_cols]
X_test <- X[-train_index, ]
X_test <- X_test[, nonzero_cols]
y_train <- y[train_index]
y_test <- y[-train_index]
eta = seq(0.1,0.9,0.1)
K = c(1:5)
cv.spls( X_train, y_train, fold=5, K, eta, kappa=0.5, select="pls2", fit="simpls",scale.x=FALSE, scale.y=FALSE, plot.it=TRUE )
eta = 0.1
eta = 0.2
eta = 0.3
eta = 0.4
eta = 0.5
eta = 0.6
eta = 0.7
eta = 0.8
eta = 0.9
Optimal parameters: eta = 0.2, K = 4
model <- spls(X_train, y_train, K = 4, eta =0.2 , kappa=0.5, select="pls2", fit="simpls",
scale.x=FALSE, scale.y=FALSE, eps=1e-4, maxstep=100, trace=FALSE)
ped <- predict.spls( model, X_test, type = c("fit","coefficient"))
ped
[,1]
[1,] 5.846174
[2,] 6.110574
[3,] 5.941446
[4,] 5.787127
[5,] 5.939230
[6,] 5.916100
[7,] 6.413289
[8,] 6.578143
[9,] 6.180510
[10,] 5.820094
[11,] 6.049748
[12,] 7.130326
[13,] 7.241039
[14,] 7.406019
[15,] 7.255352
[16,] 7.195316
[17,] 6.252328
[18,] 5.839857
[19,] 6.072287
f <- spls::ci.spls( model, coverage=0.95, B=1000,
plot.it=FALSE, plot.fix="y",
plot.var=NA, K=model$K, fit=model$fit )
10 % completed...
20 % completed...
30 % completed...
40 % completed...
50 % completed...
60 % completed...
70 % completed...
80 % completed...
90 % completed...
100 % completed...
names(f)
[1] "cibeta" "betahat" "lbmat" "ubmat"
nonzero_idx <- which(f$betahat != 0)
betahat_nonzero <- f$betahat[nonzero_idx]
length(nonzero_idx)
[1] 49122
res = MFA(geno_mat, group=c(25974,16646,12284,11790,14769,17004,8327,14040,9840,15964,7483,7403,8040,11123,8486,7629,6747,8267,4876,308), type=c(rep("s",20)), ncp=5, name.group=c("Chr01","Chr02","Chr03","Chr04", "Chr05","Chr06","Chr07","Chr08","Chr09","Chr10", "Chr11","Chr12","Chr13", "Chr14","Chr15","Chr16","Chr17","Chr18","Chr19", "scaffold"))
dim(geno_mat)
[1] 199 217000
\[VIP_{j} = \sqrt{ p \frac{\displaystyle \sum_{h=1}^{A} SS_{Y,h}\frac{w_{jh}^{2}}{\lVert w_{h} \rVert^{2}}}{\displaystyle \sum_{h=1}^{A} SS_{Y,h}} }\]
set.seed(123)
# Paramètres
n_iter <- 400
subset_size <- 10000 # nombre de SNP aléatoires par itération
ncomp_candidates <- 1:5 # candidats pour ncomp
nfold_outer <- 5
nfold_inner <- 10
results_perf
vip_perf<- results_perf[, .(R2_test_mean = mean(R2_test)), by = iter]
vip_perf_fold<- results_perf[, .(R2_test_mean = mean(R2_test)), by = fold]
ggplot(results_perf[1:1000,], aes(x = factor(iter), y = R2_test)) +
geom_violin(fill = "skyblue", color = "black") +
labs(x = "Itération", y = expression(R^2), title = "R² 10-outer-fold ") +
theme_minimal()
mean(vip_perf_fold$R2_test_mean)
[1] 0.4547343
vip_perf_fold
library(data.table)
library(ggplot2)
# Calcul du VIP moyen par SNP
vip_mean<- results_vip[, .(VIP_mean = mean(VIP),SNP_count = .N), by = SNP]
# Sélection des 30 SNP ayant le VIP moyen le plus élevé
topN<- vip_mean[order(-VIP_mean)][0:5000, SNP]
df_topNPLS<- results_vip[SNP %in% topN]
df_topNPLS <- merge(df_topNPLS, vip_mean[, .(SNP, SNP_count)], by = "SNP")
df_topNPLS$SNP_count <- df_topNPLS$SNP_count/10
ggplot(df_topNPLS, aes(x = reorder(SNP,VIP), y = VIP, fill = SNP_count)) +
geom_violin(trim = FALSE) +
geom_boxplot(width = 0.1, outlier.size = 0.5) +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
theme_bw() +
theme(
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1)
) +
labs(
x = "SNP",
y = "VIP",
title = "Distribution du VIP pour les n = 50 SNP les plus importants",
fill = "Nbr de selection du SNP"
)
hist(vip_mean$VIP_mean, breaks = seq(min(vip_mean$VIP_mean), max(vip_mean$VIP_mean), length.out = 50),
main = "Distribution des scores VIP moyens",
xlab = "scores VIP moyens", col = "lightblue", border = "white")
library(glmnet)
# Assurer que y est numérique et X est une matrice
y <- as.numeric(pheno$CIRC2009)
X <- as.matrix(geno)
train_index <- createDataPartition(y, p = 0.8, list = FALSE)
X_train <- X[train_index, ]
X_test <- X[-train_index, ]
y_train <- y[train_index]
y_test <- y[-train_index]
# Ajustement du Lasso avec validation croisée pour choisir lambda
set.seed(123)
cv_lasso <- cv.glmnet(X_train, y_train, alpha = 1, nfolds = 5)
# Afficher la meilleure valeur de lambda
best_lambda <- cv_lasso$lambda.min
print(best_lambda)
[1] 0.01043408
# Tracer l'erreur de validation croisée
plot(cv_lasso)
# Ajuster le modèle final avec lambda optimal
lasso_model <- glmnet(X_train, y_train, alpha = 1, lambda = best_lambda)
# Extraire les coefficients
coef(lasso_model)
217001 x 1 sparse Matrix of class "dgCMatrix"
s0
(Intercept) 6.531206
Chr01_13832 .
Chr01_14786 .
Chr01_14892 .
Chr01_16212 .
Chr01_16229 .
Chr01_16423 .
Chr01_60631 .
Chr01_60791 .
Chr01_61014 .
Chr01_64402 .
Chr01_64685 .
Chr01_64771 .
Chr01_64878 .
Chr01_64939 .
Chr01_66095 .
Chr01_66700 .
Chr01_66763 .
Chr01_66768 .
Chr01_67017 .
Chr01_67266 .
Chr01_67274 .
Chr01_67423 .
Chr01_67708 .
Chr01_68386 .
Chr01_69150 .
Chr01_88238 .
Chr01_88315 .
Chr01_88376 .
Chr01_88448 .
Chr01_88595 .
Chr01_89812 .
Chr01_90924 .
Chr01_91126 .
Chr01_91137 .
Chr01_91166 .
Chr01_91710 .
Chr01_91776 .
Chr01_92124 .
Chr01_106725 .
Chr01_106745 .
Chr01_106877 .
Chr01_107077 .
Chr01_108678 .
Chr01_108877 .
Chr01_109763 .
Chr01_109880 .
Chr01_110848 .
Chr01_111824 .
Chr01_111900 .
Chr01_113593 .
Chr01_113967 .
Chr01_114929 .
Chr01_115102 .
Chr01_115122 .
Chr01_115855 .
Chr01_115918 .
Chr01_116033 .
Chr01_116138 .
Chr01_116149 .
Chr01_116496 .
Chr01_116529 .
Chr01_116673 .
Chr01_116778 .
Chr01_116825 .
Chr01_116838 .
Chr01_117019 .
Chr01_117028 .
Chr01_118375 .
Chr01_118879 .
Chr01_119119 .
Chr01_119357 .
Chr01_119360 .
Chr01_119712 .
Chr01_119811 .
Chr01_119899 .
Chr01_120015 .
Chr01_122193 .
Chr01_123846 .
Chr01_124409 .
Chr01_124448 .
Chr01_126036 .
Chr01_126037 .
Chr01_126254 .
Chr01_126830 .
Chr01_127858 .
Chr01_128069 .
Chr01_128072 .
Chr01_128430 .
Chr01_128477 .
Chr01_131048 .
Chr01_131078 .
Chr01_131083 .
Chr01_131341 .
Chr01_133610 .
Chr01_133634 .
Chr01_133656 .
Chr01_133689 .
Chr01_133960 .
Chr01_134212 .
Chr01_134973 .
Chr01_135161 .
Chr01_135803 .
Chr01_136014 .
Chr01_136186 .
Chr01_136433 .
Chr01_139710 .
Chr01_150514 .
Chr01_150580 .
Chr01_161422 .
Chr01_162079 .
Chr01_162372 .
Chr01_162390 .
Chr01_163162 .
Chr01_163164 .
Chr01_164513 .
Chr01_165240 .
Chr01_165266 .
Chr01_165518 .
Chr01_167741 .
Chr01_167802 .
Chr01_168463 .
Chr01_168500 .
Chr01_168539 .
Chr01_168555 .
Chr01_168732 .
Chr01_168880 .
Chr01_168957 .
Chr01_169026 .
Chr01_169418 .
Chr01_169527 .
Chr01_169536 .
Chr01_169673 .
Chr01_169812 .
Chr01_169931 .
Chr01_169961 .
Chr01_170073 .
Chr01_170335 .
Chr01_172462 .
Chr01_172544 .
Chr01_172599 .
Chr01_172601 .
Chr01_173004 .
Chr01_173300 .
Chr01_173323 .
Chr01_173344 .
Chr01_173517 .
Chr01_199064 .
Chr01_199073 .
Chr01_199186 .
Chr01_199221 .
Chr01_200793 .
Chr01_200830 .
Chr01_200869 .
Chr01_205964 .
Chr01_206007 .
Chr01_206052 .
Chr01_206157 .
Chr01_208376 .
Chr01_208379 .
Chr01_208419 .
Chr01_210339 .
Chr01_210368 .
Chr01_211650 .
Chr01_211674 .
Chr01_211752 .
Chr01_216506 .
Chr01_216601 .
Chr01_216642 .
Chr01_216682 .
Chr01_216759 .
Chr01_216812 .
Chr01_216982 .
Chr01_216997 .
Chr01_225239 .
Chr01_225272 .
Chr01_225350 .
Chr01_225519 .
Chr01_226505 .
Chr01_226558 .
Chr01_226733 .
Chr01_227542 .
Chr01_228264 .
Chr01_228643 .
Chr01_235333 .
Chr01_237242 .
Chr01_238136 .
Chr01_238138 .
Chr01_238244 .
Chr01_238261 .
Chr01_238318 .
Chr01_238385 .
Chr01_238620 .
Chr01_238676 .
Chr01_238947 .
Chr01_238986 .
Chr01_239362 .
Chr01_240009 .
Chr01_240404 .
Chr01_240464 .
Chr01_240674 .
Chr01_240878 .
Chr01_240926 .
Chr01_240929 .
Chr01_241113 .
Chr01_241164 .
Chr01_241173 .
Chr01_241615 .
Chr01_242818 .
Chr01_242922 .
Chr01_243597 .
Chr01_244158 .
Chr01_244205 .
Chr01_244323 .
Chr01_244703 .
Chr01_244738 .
Chr01_244876 .
Chr01_244896 .
Chr01_245368 .
Chr01_245496 .
Chr01_245546 .
Chr01_245763 .
Chr01_246874 .
Chr01_246913 .
Chr01_248096 .
Chr01_248167 .
Chr01_248173 .
Chr01_248283 .
Chr01_248882 .
Chr01_250235 .
Chr01_250243 .
Chr01_250308 .
Chr01_251674 .
Chr01_251859 .
Chr01_260199 .
Chr01_260293 .
Chr01_260338 .
Chr01_260558 .
Chr01_260760 .
Chr01_260785 .
Chr01_263292 .
Chr01_263493 .
Chr01_263517 .
Chr01_263775 .
Chr01_263790 .
Chr01_263877 .
Chr01_263967 .
Chr01_263976 .
Chr01_264811 .
Chr01_265716 .
Chr01_265878 .
Chr01_265895 .
Chr01_265946 .
Chr01_266320 .
Chr01_267203 .
Chr01_268640 .
Chr01_269409 .
Chr01_269683 .
Chr01_279781 .
Chr01_280963 .
Chr01_284001 .
Chr01_284068 .
Chr01_295470 .
Chr01_295580 .
Chr01_295700 .
Chr01_295779 .
Chr01_295781 .
Chr01_296958 .
Chr01_296972 .
Chr01_297053 .
Chr01_297140 .
Chr01_297206 .
Chr01_297435 .
Chr01_297946 .
Chr01_304505 .
Chr01_304529 .
Chr01_305327 .
Chr01_305804 .
Chr01_305993 .
Chr01_306028 .
Chr01_306036 .
Chr01_306063 .
Chr01_306198 .
Chr01_311559 .
Chr01_311801 .
Chr01_311878 .
Chr01_311937 .
Chr01_312041 .
Chr01_312082 .
Chr01_313076 .
Chr01_313112 .
Chr01_313469 .
Chr01_313562 .
Chr01_313569 .
Chr01_313712 .
Chr01_314006 .
Chr01_314057 .
Chr01_314065 .
Chr01_315360 .
Chr01_315901 .
Chr01_316697 .
Chr01_317341 .
Chr01_317695 .
Chr01_317729 .
Chr01_319056 .
Chr01_319083 .
Chr01_319868 .
Chr01_319888 .
Chr01_319915 .
Chr01_320492 .
Chr01_320502 .
Chr01_320576 .
Chr01_320789 .
Chr01_320797 .
Chr01_338679 .
Chr01_338687 .
Chr01_338761 .
Chr01_339402 .
Chr01_340601 .
Chr01_340608 .
Chr01_340653 .
Chr01_369736 .
Chr01_369774 .
Chr01_370205 .
Chr01_370217 .
Chr01_370278 .
Chr01_370607 .
Chr01_370664 .
Chr01_370778 .
Chr01_371488 .
Chr01_371644 .
Chr01_371838 .
Chr01_400068 .
Chr01_400143 .
Chr01_400299 .
Chr01_400340 .
Chr01_400526 .
Chr01_401736 .
Chr01_402285 .
Chr01_402306 .
Chr01_402361 .
Chr01_402378 .
Chr01_402891 .
Chr01_402910 .
Chr01_402963 .
Chr01_402975 .
Chr01_403053 .
Chr01_406351 .
Chr01_406432 .
Chr01_406987 .
Chr01_407317 .
Chr01_408455 .
Chr01_431702 .
Chr01_432558 .
Chr01_432562 .
Chr01_432602 .
Chr01_432797 .
Chr01_433004 .
Chr01_433056 .
Chr01_433109 .
Chr01_434874 .
Chr01_444758 .
Chr01_446327 .
Chr01_446560 .
Chr01_446565 .
Chr01_446856 .
Chr01_446894 .
Chr01_446916 .
Chr01_448313 .
Chr01_451121 .
Chr01_451462 .
Chr01_451737 .
Chr01_451976 .
Chr01_452075 .
Chr01_452106 .
Chr01_452963 .
Chr01_453086 .
Chr01_453180 .
Chr01_453186 .
Chr01_470430 .
Chr01_470456 .
Chr01_470478 .
Chr01_471384 .
Chr01_471736 .
Chr01_471754 .
Chr01_472207 .
Chr01_472809 .
Chr01_473343 .
Chr01_473391 .
Chr01_473691 .
Chr01_474251 .
Chr01_474308 .
Chr01_474316 .
Chr01_474759 .
Chr01_474777 .
Chr01_475150 .
Chr01_475418 .
Chr01_475445 .
Chr01_475465 .
Chr01_508685 .
Chr01_508828 .
Chr01_508875 .
Chr01_509166 .
Chr01_509417 .
Chr01_514737 .
Chr01_514962 .
Chr01_514981 .
Chr01_515044 .
Chr01_515196 .
Chr01_515306 .
Chr01_536676 .
Chr01_536704 .
Chr01_536746 .
Chr01_536827 .
Chr01_537154 .
Chr01_537704 .
Chr01_537731 .
Chr01_538073 .
Chr01_538209 .
Chr01_538541 .
Chr01_538817 .
Chr01_539555 .
Chr01_539765 .
Chr01_540424 .
Chr01_541402 .
Chr01_541442 .
Chr01_543284 .
Chr01_543303 .
Chr01_543345 .
Chr01_543348 .
Chr01_543945 .
Chr01_543970 .
Chr01_544090 .
Chr01_544686 .
Chr01_545628 .
Chr01_545704 .
Chr01_546448 .
Chr01_546819 .
Chr01_546836 .
Chr01_548065 .
Chr01_548074 .
Chr01_548116 .
Chr01_548128 .
Chr01_548636 .
Chr01_548666 .
Chr01_548679 .
Chr01_548853 .
Chr01_548856 .
Chr01_548897 .
Chr01_553635 .
Chr01_556076 .
Chr01_556248 .
Chr01_556289 .
Chr01_556295 .
Chr01_556505 .
Chr01_556601 .
Chr01_556625 .
Chr01_556655 .
Chr01_556725 .
Chr01_556811 .
Chr01_557461 .
Chr01_557631 .
Chr01_557732 .
Chr01_557840 .
Chr01_557848 .
Chr01_566889 .
Chr01_570769 .
Chr01_570774 .
Chr01_570874 .
Chr01_572406 .
Chr01_572439 .
Chr01_572479 .
Chr01_572518 .
Chr01_573503 .
Chr01_579281 .
Chr01_579332 .
Chr01_579609 .
Chr01_579610 .
Chr01_579889 .
Chr01_587929 .
Chr01_588113 .
Chr01_588167 .
Chr01_588178 .
Chr01_588228 .
Chr01_590503 .
Chr01_590525 .
Chr01_590538 .
Chr01_590772 .
Chr01_590777 .
Chr01_590808 .
Chr01_590919 .
Chr01_590982 .
Chr01_591222 .
Chr01_591245 .
Chr01_591360 .
Chr01_591495 .
Chr01_591514 .
Chr01_591573 .
Chr01_591609 .
Chr01_591640 .
..............................
........suppressing 216001 rows in show(); maybe adjust options(max.print=, width=)
..............................
s0
Chr19_15274658 .
Chr19_15276166 .
Chr19_15277295 .
Chr19_15277351 .
Chr19_15277398 .
Chr19_15277503 .
Chr19_15277526 .
Chr19_15277548 .
Chr19_15277652 .
Chr19_15277677 .
Chr19_15289229 .
Chr19_15289241 .
Chr19_15289370 .
Chr19_15289394 .
Chr19_15289784 .
Chr19_15291310 .
Chr19_15293126 .
Chr19_15294107 .
Chr19_15316651 .
Chr19_15316714 .
Chr19_15316792 .
Chr19_15316870 .
Chr19_15318948 .
Chr19_15318997 .
Chr19_15319097 .
Chr19_15319250 .
Chr19_15319253 .
Chr19_15319328 .
Chr19_15319495 .
Chr19_15334247 .
Chr19_15334261 .
Chr19_15334575 .
Chr19_15334608 .
Chr19_15334765 .
Chr19_15334817 .
Chr19_15335034 .
Chr19_15335055 .
Chr19_15335894 .
Chr19_15335986 .
Chr19_15340197 .
Chr19_15340381 .
Chr19_15341421 .
Chr19_15341474 .
Chr19_15341661 .
Chr19_15341678 .
Chr19_15342451 .
Chr19_15352241 .
Chr19_15352335 .
Chr19_15352484 .
Chr19_15352522 .
Chr19_15352573 .
Chr19_15352595 .
Chr19_15352735 .
Chr19_15353439 .
Chr19_15353476 .
Chr19_15353566 .
Chr19_15358298 .
Chr19_15359270 .
Chr19_15359502 .
Chr19_15360407 .
Chr19_15371485 .
Chr19_15372372 .
Chr19_15372396 .
Chr19_15372659 .
Chr19_15377848 .
Chr19_15377885 .
Chr19_15377905 .
Chr19_15377922 .
Chr19_15377959 .
Chr19_15377962 .
Chr19_15379530 .
Chr19_15379600 .
Chr19_15379829 .
Chr19_15380189 .
Chr19_15380807 .
Chr19_15380830 .
Chr19_15380872 .
Chr19_15380915 .
Chr19_15380923 .
Chr19_15380960 .
Chr19_15380997 .
Chr19_15382854 .
Chr19_15383050 .
Chr19_15383311 .
Chr19_15383473 .
Chr19_15383596 .
Chr19_15384099 .
Chr19_15384265 .
Chr19_15384433 .
Chr19_15384447 .
Chr19_15384486 .
Chr19_15384538 .
Chr19_15384621 .
Chr19_15384672 .
Chr19_15384675 .
Chr19_15384779 .
Chr19_15384826 .
Chr19_15384867 .
Chr19_15385000 .
Chr19_15385014 .
Chr19_15385381 .
Chr19_15386763 .
Chr19_15386818 .
Chr19_15386854 .
Chr19_15386969 .
Chr19_15418689 .
Chr19_15418730 .
Chr19_15418838 .
Chr19_15418958 .
Chr19_15419078 .
Chr19_15419690 .
Chr19_15419691 .
Chr19_15421100 .
Chr19_15421355 .
Chr19_15422439 .
Chr19_15422450 .
Chr19_15424787 .
Chr19_15424830 .
Chr19_15424897 .
Chr19_15424913 .
Chr19_15425552 .
Chr19_15434789 .
Chr19_15437784 .
Chr19_15437821 -0.033932369
Chr19_15437831 .
Chr19_15438414 .
Chr19_15438511 .
Chr19_15439971 .
Chr19_15440585 .
Chr19_15440619 .
Chr19_15440801 .
Chr19_15441107 .
Chr19_15441113 .
Chr19_15457182 .
Chr19_15458856 .
Chr19_15473567 .
Chr19_15473575 .
Chr19_15473715 .
Chr19_15473717 .
Chr19_15473794 .
Chr19_15476673 .
Chr19_15486585 .
Chr19_15486854 .
Chr19_15488374 .
Chr19_15488411 .
Chr19_15488990 .
Chr19_15490089 .
Chr19_15490794 .
Chr19_15490946 .
Chr19_15491401 .
Chr19_15491454 .
Chr19_15491703 .
Chr19_15491745 .
Chr19_15491772 .
Chr19_15491835 .
Chr19_15501531 .
Chr19_15501584 .
Chr19_15501764 .
Chr19_15503678 .
Chr19_15503679 .
Chr19_15503800 .
Chr19_15503823 .
Chr19_15503855 .
Chr19_15504437 .
Chr19_15504489 .
Chr19_15504754 .
Chr19_15504959 .
Chr19_15504976 .
Chr19_15505148 .
Chr19_15512415 .
Chr19_15512455 .
Chr19_15512728 .
Chr19_15512751 .
Chr19_15512804 .
Chr19_15512923 .
Chr19_15514260 .
Chr19_15514280 .
Chr19_15514315 .
Chr19_15514437 .
Chr19_15514445 .
Chr19_15514495 .
Chr19_15514675 .
Chr19_15514735 .
Chr19_15514739 .
Chr19_15514801 .
Chr19_15517327 .
Chr19_15517864 .
Chr19_15519861 .
Chr19_15520446 .
Chr19_15521481 .
Chr19_15521657 .
Chr19_15522415 .
scaffold_104_5033 .
scaffold_104_6585 .
scaffold_104_6597 .
scaffold_104_17593 .
scaffold_104_17629 .
scaffold_104_17813 .
scaffold_104_18346 .
scaffold_104_20116 .
scaffold_104_21900 .
scaffold_104_22333 .
scaffold_104_22340 .
scaffold_104_22697 .
scaffold_104_24631 .
scaffold_104_25465 .
scaffold_104_25504 .
scaffold_104_176220 .
scaffold_104_176221 .
scaffold_104_176303 .
scaffold_104_176365 .
scaffold_104_176406 .
scaffold_104_176500 .
scaffold_104_176531 .
scaffold_104_176588 .
scaffold_104_176762 .
scaffold_104_176819 .
scaffold_1865_26377 .
scaffold_1865_26378 .
scaffold_1865_27086 .
scaffold_1865_28208 .
scaffold_1865_28230 .
scaffold_1865_28679 .
scaffold_1865_28685 .
scaffold_1865_28747 .
scaffold_1865_40702 .
scaffold_1865_40712 .
scaffold_1865_40749 .
scaffold_1865_41180 .
scaffold_1865_41236 .
scaffold_1865_41275 .
scaffold_1865_41282 .
scaffold_1865_43041 .
scaffold_1865_43059 .
scaffold_25_72794 .
scaffold_25_72850 .
scaffold_25_72914 .
scaffold_25_73404 .
scaffold_25_73405 .
scaffold_25_73441 .
scaffold_25_73952 .
scaffold_25_74812 .
scaffold_25_74917 .
scaffold_25_74982 .
scaffold_25_76337 .
scaffold_25_76608 .
scaffold_25_76646 .
scaffold_25_76744 .
scaffold_25_86551 .
scaffold_25_92220 .
scaffold_25_92768 .
scaffold_25_110434 .
scaffold_25_110440 .
scaffold_25_110820 .
scaffold_25_110827 .
scaffold_25_120819 .
scaffold_25_121057 .
scaffold_25_121072 .
scaffold_25_121305 .
scaffold_25_184475 .
scaffold_25_186525 .
scaffold_25_186530 .
scaffold_25_186615 .
scaffold_25_187603 .
scaffold_25_187820 .
scaffold_25_187861 .
scaffold_25_188640 .
scaffold_25_188859 .
scaffold_25_189004 .
scaffold_25_189792 .
scaffold_25_189823 .
scaffold_25_192092 .
scaffold_25_192107 .
scaffold_25_192610 .
scaffold_25_192625 .
scaffold_25_192656 .
scaffold_25_192676 .
scaffold_25_193419 .
scaffold_25_193683 .
scaffold_25_194247 .
scaffold_25_194700 .
scaffold_25_195279 .
scaffold_25_195553 .
scaffold_25_195576 .
scaffold_25_195727 .
scaffold_25_195826 .
scaffold_25_195935 .
scaffold_25_195978 .
scaffold_25_196017 .
scaffold_25_196026 .
scaffold_25_196065 .
scaffold_25_196074 .
scaffold_25_196122 .
scaffold_25_196152 .
scaffold_25_196738 .
scaffold_25_200665 .
scaffold_25_201203 .
scaffold_25_201424 .
scaffold_25_201439 .
scaffold_25_201684 .
scaffold_25_202021 .
scaffold_25_202278 .
scaffold_25_202553 .
scaffold_25_202556 .
scaffold_25_202829 .
scaffold_25_202944 .
scaffold_25_203105 .
scaffold_25_213867 .
scaffold_25_214314 .
scaffold_25_214384 .
scaffold_25_214386 .
scaffold_25_214664 .
scaffold_25_214710 .
scaffold_25_214711 .
scaffold_25_214750 .
scaffold_25_214802 .
scaffold_25_214835 .
scaffold_25_214879 .
scaffold_25_214891 .
scaffold_25_238236 .
scaffold_25_238265 .
scaffold_25_238492 .
scaffold_25_238666 .
scaffold_25_238715 .
scaffold_25_238720 .
scaffold_25_238751 .
scaffold_25_238772 .
scaffold_25_238807 .
scaffold_25_238849 .
scaffold_25_238883 .
scaffold_25_238885 .
scaffold_25_238940 .
scaffold_25_239099 .
scaffold_25_244645 .
scaffold_25_244699 .
scaffold_25_244941 .
scaffold_25_244954 .
scaffold_25_246819 .
scaffold_25_246861 .
scaffold_25_246877 .
scaffold_25_246903 .
scaffold_25_247101 .
scaffold_25_247812 .
scaffold_25_247815 .
scaffold_25_247920 .
scaffold_25_248231 .
scaffold_25_248285 .
scaffold_25_248515 .
scaffold_25_248624 .
scaffold_25_248643 .
scaffold_25_248666 .
scaffold_25_258188 .
scaffold_25_258251 .
scaffold_25_258300 .
scaffold_25_258407 .
scaffold_25_258560 .
scaffold_25_258577 .
scaffold_25_258959 .
scaffold_25_259026 .
scaffold_25_259277 .
scaffold_25_259346 .
scaffold_25_260131 .
scaffold_25_272159 .
scaffold_25_406100 .
scaffold_25_411423 .
scaffold_25_411483 .
scaffold_25_411508 .
scaffold_25_531736 .
scaffold_25_556134 .
scaffold_25_556197 .
scaffold_25_556208 .
scaffold_25_556263 .
scaffold_25_556391 .
scaffold_25_557158 .
scaffold_25_557452 .
scaffold_25_558898 .
scaffold_25_558908 .
scaffold_25_559074 .
scaffold_25_559096 .
scaffold_25_559174 .
scaffold_25_559444 .
scaffold_25_567330 .
scaffold_25_606334 .
scaffold_25_606358 .
scaffold_25_607239 .
scaffold_25_607263 .
scaffold_25_636758 .
scaffold_25_636802 .
scaffold_25_637384 .
scaffold_25_637857 .
scaffold_25_638111 .
scaffold_25_638153 .
scaffold_25_638309 .
scaffold_25_638360 .
scaffold_25_638455 .
scaffold_25_638579 .
scaffold_25_638599 .
scaffold_25_638831 .
scaffold_25_638834 .
scaffold_25_638879 .
scaffold_25_639332 .
scaffold_25_639378 .
scaffold_25_640014 .
scaffold_25_640025 .
scaffold_25_640277 .
scaffold_502_195 .
scaffold_502_17929 .
scaffold_502_59179 .
scaffold_502_59192 .
scaffold_502_59486 .
scaffold_502_59521 .
scaffold_502_67629 .
scaffold_502_68173 .
scaffold_502_68182 .
scaffold_502_96963 .
scaffold_502_106022 .
scaffold_502_106041 .
scaffold_502_106416 .
scaffold_502_106535 .
scaffold_502_120281 .
scaffold_509_28598 .
scaffold_509_29484 .
scaffold_509_30239 .
scaffold_509_30252 .
scaffold_509_31139 .
scaffold_509_59662 0.008606299
scaffold_509_61199 .
scaffold_509_64533 .
scaffold_509_64602 .
scaffold_509_64644 .
scaffold_509_65419 .
scaffold_509_92801 .
scaffold_509_92886 .
scaffold_509_92919 .
scaffold_509_92941 .
scaffold_509_93157 .
scaffold_67_63206 .
scaffold_67_77350 .
scaffold_67_77432 .
scaffold_67_77466 .
scaffold_67_77714 .
scaffold_67_78709 .
scaffold_67_87043 .
scaffold_67_87169 .
scaffold_67_90371 .
scaffold_67_98980 .
scaffold_67_99999 .
scaffold_67_100018 .
scaffold_67_100036 .
scaffold_67_103973 .
scaffold_67_103993 .
scaffold_67_104506 .
scaffold_67_104535 .
scaffold_67_104578 .
scaffold_67_131620 .
scaffold_67_131649 .
scaffold_67_131655 .
scaffold_67_132628 .
scaffold_67_132652 .
scaffold_67_132677 .
scaffold_67_145113 .
scaffold_67_145184 .
scaffold_67_146362 .
scaffold_67_146854 .
scaffold_67_146877 .
scaffold_67_169880 .
scaffold_780_11876 .
scaffold_780_11892 .
scaffold_780_50549 .
scaffold_780_50589 .
scaffold_780_75954 .
scaffold_780_75965 .
scaffold_780_83278 .
scaffold_780_85135 .
scaffold_780_87469 .
scaffold_780_87488 .
scaffold_780_90170 .
scaffold_820_18504 .
scaffold_820_65666 .
scaffold_820_66347 .
scaffold_820_66388 .
scaffold_820_66432 .
scaffold_820_68792 .
scaffold_820_68844 .
scaffold_820_68938 .
scaffold_820_68951 .
scaffold_820_68982 .
scaffold_820_69022 .
scaffold_820_69553 .
scaffold_820_69665 .
scaffold_820_69977 .
scaffold_820_76153 .
scaffold_820_76500 .
scaffold_820_78478 .
scaffold_820_78596 .
scaffold_820_79852 .
scaffold_820_80063 .
scaffold_820_80326 .
scaffold_820_80332 .
scaffold_820_80368 .
print(best_lambda)
[1] 0.01043408
y_pred <- predict(lasso_model, newx = X_test)
# Calcul du R²
SSE <- sum((y_test - y_pred)^2) # somme des carrés des erreurs
SST <- sum((y_test - mean(y))^2) # somme totale des carrés
R2 <- 1 - SSE/SST
print(sqrt(SSE))
[1] 4.11137
R2
[1] 0.3508067
coef_df <- data.frame(
SNP = rownames(coef(lasso_model)),
Coefficient = as.numeric(coef(lasso_model))
)
coef_nonzero <- coef_df[coef_df$Coefficient != 0, ]
coef_nonzero_dt <- as.data.table(coef_nonzero)
# Top N SNP par valeur absolue des coefficients
N <- 50
coef_nonzero_dt[, abs_coef := abs(Coefficient)]
topN_Lasso <- coef_nonzero_dt[order(-abs_coef)][1:N, .(SNP, Coefficient)]
topN_Lasso
common_snps <- intersect(df_topNPLS$SNP, coef_nonzero$SNP)
# SNP communs
common_snps
[1] "Chr01_1834461" "Chr01_4153800" "Chr01_42899837" "Chr02_10114463" "Chr02_19743607" "Chr02_22637968"
[7] "Chr02_2278350" "Chr02_2872467" "Chr02_4257956" "Chr03_10509513" "Chr03_16762734" "Chr03_19851644"
[13] "Chr04_20270150" "Chr04_23177824" "Chr04_5474624" "Chr04_8190739" "Chr04_8724191" "Chr05_20338179"
[19] "Chr05_6142501" "Chr06_20435964" "Chr07_12245457" "Chr07_12245863" "Chr07_2271365" "Chr08_10723562"
[25] "Chr08_2817232" "Chr08_5239720" "Chr08_7499581" "Chr08_8269878" "Chr09_12546352" "Chr09_12827737"
[31] "Chr09_2269452" "Chr09_9389914" "Chr09_9389916" "Chr10_11603279" "Chr10_12391252" "Chr10_13951310"
[37] "Chr10_16864286" "Chr10_20023115" "Chr10_8950463" "Chr10_8954059" "Chr10_9695555" "Chr11_17972339"
[43] "Chr11_18969957" "Chr12_4109852" "Chr13_11901187" "Chr13_1367659" "Chr13_1486685" "Chr13_4758531"
[49] "Chr14_2168682" "Chr14_4891599" "Chr14_5691154" "Chr14_5767204" "Chr14_8097754" "Chr15_9360139"
[55] "Chr17_10721759" "Chr18_3096178" "Chr18_8374212" "Chr18_8375522"
length(common_snps)
[1] 58
y <- as.numeric(pheno$CIRC2009)
X <- as.matrix(geno)
# Split externe train/test
set.seed(123)
train_index <- createDataPartition(y, p = 0.9, list = FALSE)
X_train <- X[train_index, ]
X_test <- X[-train_index, ]
y_train <- y[train_index]
y_test <- y[-train_index]
# Split interne du train en D1 et D2 : 50/50
set.seed(123)
idx_D1 <- createDataPartition(y_train, p = 0.5, list = FALSE)
X_D1 <- X_train[idx_D1, ]
X_D2 <- X_train[-idx_D1, ]
y_D1 <- y_train[idx_D1]
y_D2 <- y_train[-idx_D1]
alpha <- 1 # Elastic Net si alpha = 0.5, Lasso si = 1, ridge si =0
set.seed(123)
cv_enet <- cv.glmnet(
X_D1, y_D1,
alpha = alpha,
nfolds = 10,
standardize = TRUE
)
lambda_hat <- cv_enet$lambda.min
lambda_hat
[1] 0.2302412
# Ajustement final
enet_model <- glmnet(X_D1, y_D1, alpha = alpha, lambda = 0.002)
coefs <- coef(enet_model)
selected <- which(coefs[-1] != 0) # indices des SNP sélectionnés
S_hat <- selected
cat("Nombre de variables sélectionnées (screening):", length(S_hat), "\n")
Nombre de variables sélectionnées (screening): 170
# Poids dérivés des coefficients Elastic Net
beta_hat <- as.numeric(coefs[-1])
weights <- abs(beta_hat)
weights <- weights[S_hat]
weights <- weights / max(weights) # normalisation
lambda_hat <- 0.002
X_D2_sel <- X_D2[, S_hat, drop = FALSE]
# Pondération Ridge : mettre plus de pénalité sur les petites β_EN
ridge_penalty <- 1 / (weights + 1e-6)
set.seed(123)
cv_ridge <- cv.glmnet(
X_D2_sel, y_D2,
alpha = 0, # alpha = 0 → Ridge
nfolds = 10,
penalty.factor = ridge_penalty
)
lambda_ridge <- cv_ridge$lambda.min
# Ajustement final sur D2
ridge_model <- glmnet(
X_D2_sel, y_D2,
alpha = 0,
lambda = lambda_ridge,
penalty.factor = ridge_penalty
)
# Coefficients ridge finaux (sur D2)
beta_clean <- coef(ridge_model)
X_train_sel <- X_train[, S_hat, drop = FALSE]
ridge_final <- glmnet(
X_train_sel, y_train,
alpha = 0,
lambda = lambda_ridge,
penalty.factor = ridge_penalty
)
# Prédictions sur le test externe
X_test_sel <- X_test[, S_hat, drop = FALSE]
y_pred <- predict(ridge_final, newx = X_test_sel)
# R² externe
R2_test <- 1 - sum((y_test - y_pred)^2) / sum((y_test - mean(y_test))^2)
cat("R2 externe :", R2_test, "\n")
R2 externe : 0.2161655